*******************************************************
* .do-file to adjust variables ahead of final analysis
*******************************************************

clear all
set maxvar 7000

use "$psidwkddata/famind_combined.dta", clear

//////////////////////////////////////////////////
///////DROP SEO, IMMIGRANT AND LATINO SAMPLE//////
//////////////////////////////////////////////////

drop if famid68 >= 5000  & famid68 <= 7000  // SEO sample
drop if famid68 >= 3000 & famid68 <= 5000 	// Immigrant sample
drop if famid68 >= 7000  & famid68 <= 9309  // Latino sample

//////////////////////////////////////////////////
/////IDENTIFY WHO IS CURRENT HEAD & SP IN FU//////
//////////////////////////////////////////////////

// sequnum > 20 refers to individuals not present in FU (moved out, died, in institutions etc.)
replace rel_ref = . if rel_ref == 10 & sequnum > 20  // reference person 
replace rel_ref = . if rel_ref == 20 & sequnum > 20	 // legal spouse
replace rel_ref = . if rel_ref == 22 & sequnum > 20	 // partner/cohabitor

// generate identifier whether individual is reference person or spouse (and present in FU)
gen ref = 1    if sequnum <= 20 & rel_ref == 10
gen spouse = 1 if sequnum <= 20 & (rel_ref == 20 | rel_ref == 22)

// drop all observations that were in indv. file are not living in FU
drop if rel_ref == 0 | rel_ref == .   

drop if sequnum == 0
drop if sequnum >= 51 & sequnum <= 59 // in institutions
drop if sequnum >= 71 & sequnum <= 80 // moved out since last interview
drop if sequnum >= 81 & sequnum <= 89 // died since last interview

///////////////////////////////////////////////
///////DEFLATE FINANCIAL VARIABLES/////////////
///////////////////////////////////////////////

* in 2007 dollars
merge m:1 wave using "$cpipath\cpi_estimates.dta"
drop if _merge == 2 // waves not available in PSID
drop _merge

global list finwealth wealth* amountstock amtsavings totfaminc homequ inc*

foreach var of varlist $list {
gen `var'_real = `var' / CPI
rename  `var' `var'_nom
rename `var'_real `var'
}

////////////////////////////////////////////////////////
//////   DEFINITION OF SINGLES AND COUPLES  ////////////
////////////////////////////////////////////////////////

replace martgen = . if martgen > 5   // DK/NA replies (19 obs)

// by individual: recode marital status as dummy (including cohabiting)
replace martind = 1 if martind >= 1   // part of couple
replace martind = 0 if martind == 0	  // not part of couple (within FU)

drop if martind == .  // drop all observations for which I do not observe marital status (0 obs)

// single by gender: for reference persons only
gen      single = 0 if martind  == 0 & sexind == 1 & ref == 1  // single men 
replace  single = 1 if martind  == 0 & sexind == 2 & ref == 1  // single women 	

// crosscheck with marital status of reference person
tab martgen if single == 1	& ref == 1
tab martgen if single == 0	& ref == 1
tab martgen if single == .	& ref == 1

// some ref labled as having spouse in ind. file, but as single in family file --> drop 
drop if single == . & martgen > 1 & ref == 1

// couple by gender: married (or perm. cohabiting) + either reference person or spouse
gen     couple = 0 if martind  == 1 & sexind == 1 & ref == 1
replace couple = 0 if martind  == 1 & sexind == 1 & spouse == 1

replace couple = 1 if martind  == 1 & sexind == 2 & ref == 1
replace couple = 1 if martind  == 1 & sexind == 2 & spouse == 1

// regardless of gender 
gen coupleboth = 1 if martind  == 1 & ref == 1
replace coupleboth = 1 if martind  == 1 & spouse == 1

tab single if  ref == 1, m 
tab coupleboth if ref == 1 | spouse == 1, m

// family type indicator
gen fam_type = .
replace fam_type = 0 if single == 0      // single men
replace fam_type = 1 if single == 1      // single women 
replace fam_type = 2 if coupleboth == 1  // couples 


///////////////////////////////////////////////////
///////////////////DATA CLEANING///////////////////
///////////////////////////////////////////////////

// education variable hast been recoded in 1990

replace educref = . if educref == 99 & wave > 1990 // set to missing with DK/NA and no grades of school 
replace educref = . if educref == 0  & wave > 1990

replace educsp  = . if educsp == 99 & wave > 1990
replace educsp  = . if educsp == 0  & wave > 1990

replace educref = . if educref == 9 & wave <= 1990
replace educsp  = . if educsp  == 9 & wave <= 1990

// create categorial variable (more or less than 12 years of schooling)  

gen     educcat = 0 if educref <= 12 & educref != . & wave > 1990
replace educcat = 1 if educref > 12  & educref != . & wave > 1990

replace educcat = 0 if educref <= 4  & educref != . & wave <= 1990
replace educcat = 1 if educref > 4   & educref != . & wave <= 1990

gen     educcatsp = 0 if educsp <= 12 & educsp != . & wave > 1990
replace educcatsp = 1 if educsp > 12  & educsp != . & wave > 1990

replace educcatsp = 0 if educsp <= 4  & educsp != . & wave <= 1990
replace educcatsp = 1 if educsp > 4   & educsp != . & wave <= 1990


// education of individual (not by reference person/spouse)
replace educind = . if educind == 0 | educind == 99

gen educind_cat     = 0 if educind <= 12 & educind != . 
replace educind_cat = 1 if educind > 12  & educind != .

// age variable, recode non-responses
replace ageref  = . if ageref == 999
replace agesp   = . if agesp  == 999 
replace ageind  = . if ageind == 999 

// birthyear (from individual file) 
replace birthyear = . if birthyear == 9999 | birthyear == 0

* birth year of reference person for entire family
gen byref_help = birthyear if ref == 1
bysort wave famid: egen byref=max(byref_help)
drop byref_help

* birth year of spouse for entire family
gen bysp_help = birthyear if spouse == 1
bysort wave famid: egen bysp=max(bysp_help)
drop bysp_help

* average birth year reference person/spouse
gen byav = byref if single !=.
replace byav = (byref+bysp)/2 if couple !=.

///////////////////////////////////////////////////
/////////// WORKING AGE POPULATION ////////////////
///////////////////////////////////////////////////

gen ageav = (ageref+agesp)/ 2 if  agesp != 0 
replace ageav = ageref if agesp == 0		

keep if ageav >= 30 & ageav <=65	

///////////////////////////////////////////////////
//////////// KEEP ONLY SPOUSE AND REF /////////////
///////////////////////////////////////////////////
drop if ref == . & spouse == . 

save "$psidwkddata/famind_combined_adj_sing.dta", replace